State Results

# clear labels for versions
versions <- tibble(
  version = c("v1", "v2", "v3", "v4", "v5", "v6", "v7"),
  vlabel = factor(
    c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
    "$\\beta$ Centered at Survey Value",
    "$Pr(S_1|untested)$ Centered at Survey Value",
    "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
        "$\\beta$ Centered at Survey Value (Spline Smoothed)",
         "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
        "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
    ),
    levels = c(
        "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", 
        "$\\beta$ Centered at Survey Value",
         "$Pr(S_1|untested)$ Centered at Survey Value",
        "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
        "$\\beta$ Centered at Survey Value (Spline Smoothed)",
         "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values ($\\beta$ Spline Smoothed)",
        "$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$"
    ) )
)


# read state-level results
targets_dir <- here("_targets")

state_res <- tar_read(state_v1, 
                   store =targets_dir) %>% 
  bind_rows(
    tar_read(state_v2,
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v3, 
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v4, 
             store =targets_dir)
   )%>%
   bind_rows(
    tar_read(state_v5,
             store =targets_dir)
  )%>%
   bind_rows(
    tar_read(state_v6,
             store =targets_dir)
  ) %>%
   bind_rows(
    tar_read(state_v7,
             store =targets_dir)
  )

covidestim <- tar_read(covidestim_biweekly_state,
                        store =targets_dir)

dates <- readRDS(here("data/data_raw/date_to_biweek.RDS"))
codes <- read_csv(here('data/demographic/statecodes.csv'))

# last biweek for first model is biweek 24
last_biweek_old_model <- dates %>% 
  group_by(biweek) %>%
  summarize(mindate=min(date),maxdate=max(date))  %>%
  filter("2021-11-30" >= mindate & "2021-11-30" <= maxdate)


cat("Last two week interval with previous model was biweek", last_biweek_old_model$biweek,
    "\ndates", paste0("(", last_biweek_old_model$mindate, ", ",
                    last_biweek_old_model$maxdate, ")"))
## Last two week interval with previous model was biweek 24 
## dates (2021-11-19, 2021-12-02)
end_old_model <- last_biweek_old_model$maxdate


state_res <- state_res %>%
  rename(state=fips) %>%
  left_join(versions) %>%
  mutate(state=toupper(state)) %>%
  left_join(covidestim, relationship= "many-to-many") %>%
  left_join(dates, relationship = "many-to-many")  %>%
  rename(name=state_name) %>%
  group_by(biweek) %>%
  mutate(mindate=min(date),
         maxdate=max(date)) %>%
  ungroup()


if(filter_versions){
  state_res <- state_res %>%
  filter(version %in% c("v3", "v5", "v6", "v7"))  %>%
    mutate(vlabel=case_when(
      version == "v7" ~  "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$",
      version == "v5" ~ "$\\beta$ Centered at Survey Value",
      version =="v6" ~  "$Pr(S_1|untested)$ and $\\beta$ Centered at Survey Values",
      version == "v3" ~ "$Pr(S_1|untested)$ Centered at Survey Value"
    ))

}
theme_c <- function(...){ 
   # font <- "Helvetica"   #assign font family up front
    theme_bw() %+replace%    #replace elements we want to change
    
    theme(
      
      
      #text elements
      plot.title = element_text(             #title
                   size = 16,                #set font size
                   face = 'bold',            #bold typeface
                   hjust = .5,
                   vjust = 3),               
      
      plot.subtitle = element_text(          #subtitle
                   size = 12,
                   hjust = .5,
                   face = 'italic',
                   vjust = 3),               #font size
      
      axis.title = element_text(             #axis titles
                   size = 12),               #font size
      
      axis.text = element_text(              #axis text
                   size = 9),
      legend.text = element_text(size = 12),
      # t, r, b, l
      plot.margin = unit(c(1,.5,.5,.5), "cm"),
      legend.position = "right",
      strip.text.x = element_text(size = 11, face = "bold", color="white"),
      strip.text.y = element_text(size = 11, face = "bold", color="white",angle=270),
      strip.background = element_rect(fill = "#3E3D3D")
      ) %+replace%
      theme(...)
   
}

Summarizing Concordance with Covidestim

# corrected %>%
#   mutate(in_interval = case_when(
#     exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
#     exp_cases_lb  > infections  ~ "Below Interval",
#     exp_cases_ub < infections ~ "Above Interval"
#   )) %>%
#   filter(!is.na(in_interval)) %>%
#   group_by(in_interval, state, vlabel) %>%
#   summarize(n_per_county=n()) %>%
#   group_by(vlabel, in_interval) %>%
#   summarize(n_per_version = sum(n_per_county)) %>%
#   group_by(vlabel) %>%
#   mutate(total = sum(n_per_version)) %>%
#   ungroup() %>%
#   mutate(prop=n_per_version/total)



by_version <- state_res %>%
 # filter(biweek >=6) %>% 
  select(-date) %>%
  distinct() %>% 
  mutate(in_interval = case_when(
    exp_cases_lb <= infections & infections <= exp_cases_ub ~ "In Interval",
    exp_cases_lb  > infections  ~ "Below Interval",
    exp_cases_ub < infections ~ "Above Interval"
  )) %>%
  filter(!is.na(in_interval)) %>%
  group_by(in_interval, vlabel) %>%
  summarize(n=n()) %>%
  group_by(vlabel) %>%
  mutate(total = sum(n)) %>%
  mutate(prop=n/total) 

labels <- by_version %>%
  arrange(prop) %>%
  pull(vlabel) %>%
  as.character() %>%
  unique()


by_version %>%
  mutate(in_interval = factor(in_interval, 
                             levels = c("Above Interval",
                                        "In Interval",
                                        "Below Interval"
                                        ))) %>%
   ggplot(aes(x= fct_reorder(vlabel,prop,max), 
             fill = in_interval, y =prop)) + 
  geom_bar(stat="identity",position="stack") +
  theme_c() +
  coord_flip()+ scale_fill_manual(values=c("In Interval" = "#79D2AF",
                             "Above Interval" = "#D2AF79",
                      "Below Interval"="#7997D2"),
                       breaks = c("Below Interval",
                               "In Interval",
                               "Above Interval")) +
  labs(x="", 
       y= "Proportion",
       fill = "Covidestim Median:",
       title = "Proportion of Simulation Intervals Where Covidestim Median\nWas Within, Above, or Below the Median") +
  theme_c() +
  theme_c(legend.position="right",
          legend.title = element_text(face="bold", size = 15),
          legend.text= element_text( size = 15),
          legend.spacing.y = unit(3, 'pt'),
          axis.text.y = element_text(size = 17, hjust=1)) +
  theme(plot.title = element_text(size = 20),
          plot.subtitle = element_text(size=18, 
                                       face='italic', 
                                       margin=margin(4,0,4,0)),
        axis.text.x=element_text(size=12),
        axis.title = element_text(size=14)) +
  scale_x_discrete(labels = (TeX(labels)))

ggsave(here('figures/concordance-covidestim-state.pdf'))
by_version %>%
  group_by(vlabel) %>%
  filter(in_interval == "In Interval") %>%
  mutate(n_interval = n) %>%
  ggplot(aes(y=fct_reorder(vlabel,prop), x=prop)) +
  geom_text(aes(label=round(prop,3), x= prop+.03), 
             angle=0) +
  geom_bar(stat="identity", fill="#596281") +
  scale_y_discrete(breaks= unique(by_version$vlabel),
                   labels=function(x)TeX(x)) +
  scale_x_continuous(expand=c(0,0), 
                     limits=c(0,.9), n.breaks=7)  +
  theme_c(axis.text.y = element_text(size = 13, hjust=1),
          axis.title.x = element_text(size=14),
          axis.text.x=element_text(size=10)) +
  labs(y="",
       x="Proportion Containing the Covidestim Median",
       title="Proportion Containing the Covidestim Median",
       subtitle = "By Implementation")

Simulation Intervals Over Time

all_versions <- state_res %>% 
  group_by(state) %>%
  summarize(state_n=n_distinct(version)) %>%
  filter(state_n == max(state_n)) %>%
  pull(state)
state_res %>%
   filter(state %in% sample(unique(all_versions),5))  %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub),
             alpha = .8,
             show.legend=FALSE,
             fill="#596281") +
   geom_linerange(aes(xmin = mindate,
                      xmax=maxdate,
                      y = positive,
                      color = 'obs'),
                  size=.5) +
  facet_grid(name~vlabel, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed)),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

Compare Versions

All Versions

subset <- state_res %>%
  filter(vlabel ==  "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$") 

# pal <- c("#247C90", "red")
# 
# names(pal) <- c(as.character(unique(subset$vlabel)), "Covidestim")


state_res %>%
   filter(state %in% sample(unique(all_versions),5))  %>% 
  filter(version %in% c("v2", "v5", "v6", "v7")) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill=vlabel),
             alpha = .8,
             show.legend=FALSE #,
           #  fill="#596281"
             ) +
   geom_linerange(aes(xmin = mindate,
                      xmax=maxdate,
                      y = positive,
                      color = 'obs'),
                  size=.5) +
  facet_grid(name~vlabel, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed)),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  scale_fill_manual(values = pal)  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State") +
  scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
                     name = '') +
  guides(color = guide_legend(override.aes = list(linewidth =2)))

Wu et al.’s Priors versus CTIS Priors (that do not vary)

colors_labs <- state_res %>%
  filter(version %in% c("v1","v7")) %>%
  pull(vlabel) %>% unique()

state_res %>%
  filter(version %in% c("v1","v7") & state %in% sample(unique(.$state),15)) %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill=vlabel),
             alpha = .6
             ) +
  facet_wrap(~name, 
             labeller=labeller(vlabel =as_labeller(
               TeX, default=label_parsed),
               nrow=3),
             scales="free_y") +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(
          legend.position = "top",
          ) +
  viridis::scale_fill_viridis(option="magma", begin=.1,end=.9, discrete=TRUE,
                              breaks =(colors_labs),
                              labels=TeX(as.character(colors_labs)))  +
  labs(x="",
       y = "Estimated Infections",
       title = "Simulation Intervals Over Time",
       subtitle = "By State",
       fill = "") 

Compare States

# comp_state_pal <- c("#247C90", "red")
# 
# names(comp_state_pal) <- c('Probabilistic Bias Analysis', "Covidestim")
# 
# 
# vlab_for_pal <- unique(subset$vlabel) %>% as.character()

color_for_bias <- pal[[which(names(pal)=="$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$")]]

comp_state_pal<- c(color_for_bias,"red")
names(comp_state_pal) <- c('Probabilistic Bias Analysis', "Covidestim")


subset %>%
   # mutate(vlabel = gsub("$\\,$CTIS Priors that Do Not Vary by State or Date$\\,$",
   #                     "$Priors\\,Do\\,Not\\,Vary\\,by\\,State\\,and\\,Date$", as.character(vlabel))) %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill = 'Probabilistic Bias Analysis'),
             alpha = .8,
             show.legend=FALSE) +
  geom_ribbon(aes(x = date,
                  fill = 'Covidestim',
                  ymin = infections.lo,
                  ymax = infections.hi),
              alpha = .6) +
  geom_vline(xintercept=end_old_model,
             linetype=2,
             color="darkred",
             linewidth=.5) +
  facet_wrap(~name, ncol=4, scales = "free",
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3.5 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x=element_text(size=7),
          axis.text.y = element_text(size=4.5),
          legend.position = "top",
          legend.text=element_text(size=12),
          strip.text.x=element_text(size=11,
                                    face='plain',
                                    color='white')) +
  scale_fill_manual(values =comp_state_pal, name='')  +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by State")) +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here('figures/intervals-and-covidestim-all-states.pdf'))

Ratio of Estimated to Observed

subset %>% 
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb/positive,
             ymax = exp_cases_ub/positive,
             fill = vlabel),
             alpha = .9,
             show.legend=FALSE,
             fill= "#247C90") +
  facet_wrap(~state, ncol=5, 
              labeller= labeller(vlabel =as_labeller(TeX, default=label_parsed))) +
  scale_y_continuous(labels = comma) +
  scale_x_date(date_breaks = "3 months", 
               date_labels = "%b %Y") +
  theme_c(axis.text.x=element_text(size=6),
          axis.text.y = element_text(size=8),
          legend.position = "top") +
  scale_fill_manual(values =pal,
                    labels = TeX(names(pal)),
                    name='',
                    breaks="Covidestim")  +
  labs(y = "Ratio of Estimated Infections to Observed",
       x="",
       title = paste0("Ratio of Estimated Infections to Observed by State")) +
  guides(color = guide_legend(override.aes = list(linewidth =2),
                              nrow=2,
                              byrow=TRUE))

ggsave(here('figures/ratio-estimated-to-observed-all-states.pdf'))

Comparing States at Specific Two-week Intervals

During Alpha Wave

max_val <- subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13,27)) %>%
  mutate(ratio=exp_cases_ub/positive) %>%
  pull(ratio) %>%
  max()
  
  
plt1 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 7) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  # geom_point(aes(y=state_name,x=exp_cases_median/positive),
  #            color = "darkred", size=.8, alpha=.5) +
  theme_c() + 
  theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Alpha Wave",
       subtitle = "March 26, 2021 through April 8, 2021") +
  xlim(0,max_val+2)

plt1

During Delta Wave

subset %>%
  mutate(ratio = exp_cases_median/positive) %>%
  group_by(biweek) %>%
  summarize(mean = mean(ratio),
            mindate=min(date),
            maxdate=max(date)) %>%
  arrange(desc(mean)) 
plt2 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 13) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  # geom_point(aes(y=state_name,x=exp_cases_median/positive),
  #            color = "darkred", size=.8, alpha=.5) +
  theme_c() + 
 theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Delta Wave",
       subtitle = "June 18, 2021 through July 1, 2021") +
  xlim(0,max_val+2)

plt2

During Omicron Wave

subset %>% 
  left_join(codes) %>%
  filter(biweek == 27) %>%
  pull(date) %>% 
  range()


subset %>% 
  left_join(codes) %>%
  filter(biweek == 13) %>%
  pull(date) %>% 
  range()


plt3 <- subset %>% 
  # left_join(codes) %>%
  filter(biweek == 27) %>%
  ggplot(aes(y =fct_reorder(name,exp_cases_median/positive),
             xmin = exp_cases_lb/positive,
             xmax = exp_cases_ub/positive)) +
  geom_errorbar() +
  theme_c() + 
  theme(axis.text.y=element_text(size=10),
        axis.text.x = element_text(size=11),
        axis.title = element_text(size = 17),
        plot.subtitle = element_text(size=14),
        plot.title = element_text(size=17, face="plain")) +
  labs(y="", x="Ratio of Estimated Infections\nto Observed",
       title ="During Two-week Period\nin the Omicron Wave",
       subtitle = "December 31, 2021 through January 14, 2022") +
  xlim(0,max_val+2)

plt3

All Waves Together

plt <- plot_grid(plt1,plt2, plt3, nrow=1, align="none", labels="auto", label_size=19)

title <- ggplot() +
  labs(title = "Ratio of Estimated Infections to Observed Infections") +
  theme_c(plot.title=element_text(size=25))

plot_grid(title, plt, nrow=2, 
          rel_heights= c(.05,.95))

ggsave(here('figures/ratio-estimated-to-observed-multiple-waves.pdf'))
cat("Time interval with the highest ratio of estimated cases to observed: ")
## Time interval with the highest ratio of estimated cases to observed:
subset %>%
  group_by(biweek) %>%
  mutate(ratio=exp_cases_median/positive) %>%
  summarize(mean_ratio=mean(ratio),
         date = paste(range(date),collapse=" to ")) %>%
  slice_max(n=1, order_by=mean_ratio)
subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  select(-date) %>%
  distinct() %>%
  group_by(state) %>%
  mutate(ord = empirical_testpos[which(biweek==7)]) %>%
  mutate(biweek=factor(biweek)) %>%
  ggplot(aes(y=fct_reorder(state,ord),
             x=empirical_testpos,fill=biweek)) +
  geom_bar(stat="identity",position="dodge")+
  theme_c() +
  theme_c() +
  labs(title = "Testing Positivity by State",
       y="",
       x="Biweekly Testing Positivity",
       fill="") +
  scale_x_continuous(n.breaks=10)

subset %>% 
  # left_join(codes) %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  group_by(biweek) %>%
  mutate(daterange= paste(format(min(date), "%B %d %Y"),
                          "to", 
                          format(max(date), "%B %d %Y")),
         daterange=factor(
           daterange,
           levels=c(
             "March 26 2021 to April 08 2021",
             "June 18 2021 to July 01 2021",
             "December 31 2021 to January 14 2022"))) %>%
  select(-date) %>%
  distinct() %>%
  group_by(state) %>%
  mutate(testrate=total/population,
         ord = testrate[which(biweek==7)]) %>%
  mutate(biweek=factor(biweek)) %>%
  ggplot(aes(y=fct_reorder(name,ord,.desc=TRUE),
             x=testrate,fill=daterange)) +
  geom_bar(stat="identity",position="dodge") +
  theme_c() +
  labs(title = "Testing Rate by State",
       y="",
       x="Biweekly Testing Rate",
       fill="") +
  scale_x_continuous(n.breaks=10)

compare_testrates <- subset %>%
  mutate(testrate=total/population) %>%
  select(-date) %>%
  distinct() %>%
  filter(biweek %in% c(7, 13, 27))  %>%
  select(biweek,state,testrate) %>%
  pivot_wider(names_from=biweek, values_from=testrate, names_prefix="biweek_") %>%
  mutate(omicron_over_alpha = biweek_27/biweek_7,
         omicron_over_delta = biweek_27/biweek_13) %>%
  select(state,contains("omicron")) %>%
  pivot_longer(c(omicron_over_alpha,omicron_over_delta)) %>%
  group_by(name) %>%
  summarize(mean_val = mean(value))

cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_alpha") %>% pull(mean_val), "times the testing rate during this time interval in the alpha wave.") 
## The testing rate during this two-week interval during the omicron wave was 2.440557 times the testing rate during this time interval in the alpha wave.
cat("The testing rate during this two-week interval during the omicron wave was", compare_testrates %>% filter(name=="omicron_over_delta") %>% pull(mean_val), "times the testing rate during this time interval in the delta wave.") 
## The testing rate during this two-week interval during the omicron wave was 4.931176 times the testing rate during this time interval in the delta wave.

Rank Ratio of Estimated to Observed Over Time

ranks <- subset %>%
  mutate(ratio = exp_cases_median/positive,
         tested = total/population) %>%
  # one observation per biweek per state
  select(biweek, date, ratio, name,tested) %>%
  group_by(biweek,ratio,name,tested) %>%
  summarize(date=min(date)) %>%
  # rank for each time interval
  group_by(biweek) %>%
  mutate(rank = rank(ratio),
         rank_tested=rank(-tested))


r <- ranks %>%
  mutate(rank_group = case_when(
    rank <= 10 ~  "Top 10",
    rank > 41 ~ "Bottom 10",
    TRUE ~ "Middle" )) %>%
  group_by(name, rank_group) %>%
  mutate(n=n()) %>%
  group_by(name) %>%
  mutate(total =n(),
         max_group = rank_group[which.max(n)]) %>%
  filter( max(n) >24) %>%
  filter(max_group != "Middle")  %>%
  ungroup()


top_10 <- r %>% filter(rank_group=="Top 10") %>%
  arrange(desc(n)) %>% pull(name) %>%
  unique() %>%paste(collapse=", ")

cat("States that consistently have the lowest ratios:", top_10)
## States that consistently have the lowest ratios: Rhode Island, Massachusetts, District of Columbia, Alaska, New York, Vermont
bottom_10 <- r %>% filter(rank_group=="Bottom 10") %>%
  arrange(desc(n)) %>% pull(name) %>%
  unique() %>%paste(collapse=", ")


cat("States that consistently have the highest ratios:", bottom_10)
## States that consistently have the highest ratios: Mississippi, South Dakota, Oklahoma, Nebraska, Tennessee
end_labs <- r %>%
  ungroup() %>%
  filter(date==max(date)) %>%
  mutate(date = date + days(10)) %>%
  select(name, rank, date) %>%
  ungroup()
##############################
# RANK PLOT OVER TIME
##############################
# 
# set.seed(999)
# 
# pal1 <-viridis_pal(option="viridis", end = .9, begin=.3)(3)
# pal2 <- viridis_pal(option="rocket", end = .9, begin=.3)(2)
# pal3 <- viridis_pal(option="magma", end = .9, begin=.3)(2)
# pal4 <- viridis_pal(option="mako", end = .8, begin=.3)(2)
# pal5 <- viridis_pal(option="cividis", end = .9, begin=.1)(3)
# 
# 
# rankpal <- c(pal1, pal2, pal3,pal4,pal5)
# indexes <- sample.int(length(rankpal), replace=FALSE)
# rankpal <- rankpal[indexes]


set.seed(123)

rankpal <- c("#b4ddd4", "#7b2c31", "#65d04b",
             "#da239b", "#b7d165", "#453fbc", 
             "#658114", "#af3fe8", "#ffb947",
             "#0a60a8", "#f87945", "#16d0ae")

rankpal <- c("#851657", "#be3acd", "#19a71f",
             "#824598", "#ed820a", "#BB8E37",
             "#974810", "#1f84ec", "#d02023",
             "#059dc5", "#f23387", "#16d0ae")

indexes <- sample.int(length(rankpal), replace=FALSE)
rankpal <- rankpal[indexes]



r %>%
 # filter(rank_group!="Middle") %>%
  ggplot() +
  geom_point(aes(x=date,y=rank, 
             group=name,
             color= name),
             size=.5) +
  geom_line(aes(x=date,y=rank, 
             group=name,
             color= name)) +
 # facet_wrap(~max_group) +
  ggrepel::geom_text_repel( aes(x= date-days(4),
                y = rank,
                color=name,
                label = name),
           end_labs,
           min.segment.length=2,
           nudge_y=0,
           hjust=0,
           size=3,
           direction="y",
           force_pull=9,
           box.padding=.15) +
  theme_c(legend.position="none",
          axis.text.x = element_text(angle =0,size=14)) +
  # theme(plot.subtitle =element_text(size=18),
  #       axis.title.x=element_text(size=16),
  #       axis.title.y=element_text(size=16)) +
 # scale_color_brewer(palette="Dark2")  +
  scale_color_manual(values=rankpal) +
  scale_x_date(date_breaks="2 months", date_labels = "%b %Y",
               limits = c(ymd("2021-01-01"),
                          ymd("2022-04-15"))) +
  labs(y = "Rank of Ratio",
       title = "Rank of Estimated Infections to Observed Infections Over Time",
       subtitle = "For States Ranked in the Top or Bottom 10\nfor More Than 80% of Time Intervals",
       x="Date") +
  scale_y_reverse(breaks=seq(1,51, by=9))

Summer Testing Rate

subset %>% 
  mutate(testrate = total/population) %>%
  group_by(biweek) %>% 
  mutate(m = median(testrate),
            date=min(date)) %>%
  ggplot(aes(x=date,y=testrate, group=state)) +
  geom_line(alpha=.2) +
  geom_line(aes(y=m, color='Median'), size=2) +
  labs(y = "Total Number of Tests\nOver Population Size",
       title ="Testing Rate Over Time",
       x="") +
  theme_c() +
  theme(axis.title.x = element_text(size=16),
          axis.title.y  = element_text(size=16),
        legend.position='top') +
  geom_vline(xintercept = ymd("2021-07-16"), linetype = 2, linewidth=.5) +
  geom_vline(xintercept = ymd("2021-06-18"), linetype = 2, linewidth=.5) +
  scale_x_date(date_breaks="2 months",
               date_labels = "%b %Y") +
  scale_color_manual(values=c(Median="darkred"),name='')

ggsave(here('figures/testrate-low-summer.pdf'), height=6,width=12, dpi=400)

Variant Data

m <- state_res %>%
  filter(state == "MI" & version=="v7") %>%
  pull(exp_cases_ub) %>%
  max()


variant <- tar_read(variant, store =targets_dir)

varpal <- c("#7BD8DA","#709f0f", "#e71761", "#9245c8", 
            "#97481c", "#5054b1", "#DA7BA8", "#26B392")

state_res %>%
  filter(state == "MI" & version %in% c("v7")) %>%
 # filter(state %in% sample(corrected$state,5)) %>%
 # filter(biweek >=6) %>%
  group_by(biweek) %>%
  mutate(xmin = min(date),
         xmax = max(date)) %>%
  ungroup() %>%
   ggplot() +
  geom_ribbon(aes(x = date, 
             ymin = exp_cases_lb,
             ymax = exp_cases_ub,
             fill =vlabel),
           #  fill = "#3E3D3D",
             alpha = .5,
             show.legend=FALSE) +
  scale_x_date(date_breaks = "2 months", 
               date_labels = "%b %Y") +
  theme_c(legend.position="top") +
  geom_line(aes(x=week, y=share*m,
                color =variant_category,
                group=variant_category),
            data=variant,
            linewidth=1.3) +
  # scale_fill_manual(values =pal,
  #                   labels = TeX(names(pal)),
  #                    breaks='Covidestim',
  #                   name='')  +
  scale_y_continuous(sec.axis = sec_axis( trans=~./m, name="Variant Proportion"),
                     labels = comma) +
  labs(y = "Estimated Infections",
       title = paste0("Estimated Infections by Version in Michigan"),
       color = "SARS-CoV-2 Variant",
       x="Date") +
  guides(color = guide_legend(override.aes = list(linewidth =3),
                              ncol=2,title.position="top")) +
 # scale_color_brewer(palette="Accent") 
  scale_color_manual(values=varpal) +
  scale_fill_manual(values=pal)

ggsave(here('figures/michigan_variant.pdf'), dpi=400)

Comparing Overlap

Weaknesses in this approach:

  • it doesn’t distinguish between a very small interval being covered (i.e. when there are very few cases) and a larger interval being covered (i.e. when there are many cases)
  • it doesn’t consider the width of the PB interval
state_overlaps <- subset %>%
  select(-date) %>%
  distinct() %>%
  mutate(lower_overlap = map2_dbl(infections.lo, exp_cases_lb, ~max(.x,.y)),
         upper_overlap = map2_dbl(infections.hi, exp_cases_ub, ~min(.x,.y)),
         overlap = case_when(
           upper_overlap-lower_overlap > 0 ~   upper_overlap-lower_overlap,
           TRUE ~ 0
         )) %>%
  select(contains('overlap'), name, biweek, 
         population, positive, total, exp_cases_lb,
         exp_cases_ub, contains('infections')) %>%
  mutate(overlap_not_norm = overlap,
         overlap = overlap/population,
         fraction_overlap = overlap_not_norm/(infections.hi-infections.lo)) 



#############
# BY STATE
#############
state_overlaps %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap)) %>%
  ggplot(aes(y= fct_reorder(name, mean_overlap),
             x =  mean_overlap)) +
  geom_bar(stat="identity") +
  theme_c() +
  labs(y="",
       x="Mean Overlap",
       title = "Mean Overlap with Covidestim by State",
       subtitle = "Overlap Normalized by Width of Covidestim Interval")

##########################
# BY TWO-WEEK INTERVAL
##########################
state_overlaps %>%
  group_by(biweek) %>%
  summarize(mean_overlap=mean(fraction_overlap)) %>%
  left_join(dates) %>%
  group_by(biweek) %>%
  summarize(date = median(date),
            mean_overlap=unique(mean_overlap)) %>%
  ggplot(aes(x=date,
             y =  mean_overlap)) +
  geom_bar(stat="identity", alpha =.7) +
  geom_vline(xintercept=end_old_model,linetype=2,
             color='darkred',
             linewidth=1.1) +
  theme_c() +
  labs(y="",
       x="Mean Overlap",
       title = "Mean Overlap with Covidestim by Time Interval",
       subtitle = "Overlap Normalized by Width of Covidestim Interval") +
  scale_x_date(date_breaks="2 months", date_labels="%b %Y")

##########################
# BY TWO-WEEK INTERVAL
##########################
# state_overlaps %>%
#   mutate(posrate=positive/total) %>%
#   filter(name %in% sample(unique(state_overlaps$name), 5)) %>%
#   ggplot(aes(x=posrate, y = fraction_overlap)) +
#   geom_point(alpha=.8) +
#   theme_c() +
#   # facet_wrap(~name, scales="free_y") +
#   labs(x = "Positivity Rate", y = "Overlap") +
#   scale_y_continuous(labels=comma)
set.seed(123)
state_sample <- sample(unique(state_overlaps$name), 16)

state_overlaps %>%
  mutate(posrate=positive/total) %>%
 # filter(name %in% state_sample) %>%
  ggplot(aes(x=posrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  facet_wrap(~name, scales="free_y") +
  labs(x = "Positivity Rate", y = "Overlap",
       title = "Comparing Fraction of Interval Covered by Positivity Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(posrate=positive/total) %>%
#  filter(name %in% state_sample) %>%
  ggplot(aes(x=posrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
 # facet_wrap(~name, scales="free_y") +
  labs(x = "Positivity Rate", y = "Overlap",
       title = "Comparing Fraction of Interval Covered by Positivity Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(testrate=total/population) %>%
  filter(name %in% state_sample) %>%
  ggplot(aes(x=testrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  facet_wrap(~name, scales="free") +
  labs( x= "Total Tests / Population Size", 
        y = "Fraction of Covidestim Interval\nOverlapping with the PB interval",
        title = "Comparing Fraction of Interval Covered by Testing Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

state_overlaps %>%
  mutate(testrate=total/population) %>%
  filter(name %in% state_sample) %>%
  ggplot(aes(x=testrate, y = fraction_overlap)) +
  geom_point(alpha=.8) +
  theme_c() +
  labs( x= "Total Tests / Population Size", 
        y = "Fraction of Covidestim Interval\nOverlapping with the PB interval",
        title = "Comparing Fraction of Interval Covered by Testing Rate",
        subtitle = "Each Point Corresponds to a State-Biweek")

Relationship Between Agreement and Cumulative Incidence (Pre-Omicron)

state_population_link <- "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/state/detail/SCPRC-EST2019-18+POP-RES.csv"
state_pop <- read_csv(state_population_link)
statecodes <- read_csv(here("data/demographic/statecodes.csv"))
  
state_pop <- state_pop %>%
    left_join(statecodes, by = c("NAME" = "state")) %>%
    select(population = POPESTIMATE2019,
           state = code,
           name = NAME) %>%
    filter(!is.na(state))
  
state_deaths <- tar_read(state_deaths, store=targets_dir)


set.seed(123)
state_sample <- sample(unique(state_deaths$state), 5)

state_deaths %>%
#  filter(state %in% state_sample) %>%
  ggplot(aes(x=date,y=deaths/cases)) +
  geom_line() +
  facet_wrap(~state) +
  theme_c() +
  geom_vline(xintercept=end_old_model, color='darkred', linetype=2)

comp_deaths <- state_deaths %>%
  mutate(omicron=ifelse(date <= mdy("12-02-2021"),
                        "pre_omicron", "omicron")) %>%
  group_by(state, omicron) %>%
  summarize(cumulative_deaths = sum(deaths),
            cumulative_cases= sum(cases)) %>%
  pivot_wider(names_from = omicron, 
              values_from = c(cumulative_deaths, cumulative_cases)) %>%
  mutate(pre_deaths_over_cases = cumulative_deaths_pre_omicron/cumulative_cases_pre_omicron,
         omicron_deaths_over_cases = cumulative_deaths_omicron/cumulative_cases_omicron) 



comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  pivot_longer(contains("over_cases")) %>%
  group_by(state) %>%
  mutate(pre = value[which(name=="pre_deaths_over_cases")]) %>%
  mutate(name = case_when(
    grepl("omicron", name) ~ "Omicron",
    grepl("pre", name) ~ "Before Omicron"
  )) %>%
  ggplot(aes(y=fct_reorder(state,pre),  x= value, fill=name)) +
  geom_bar(stat='identity', position='dodge') +
  theme_c(legend.title = element_text(face="bold", size=14)) +
  labs(x = TeX("$\\frac{Total\\,Deaths}{Total\\,Cases}$"),
       y="",
       fill = "Time Period",
       title = "Total Deaths Over Total Cases in the Time Period Before Omicron\n Compared to During Omicron") +
  scale_x_continuous(n.breaks=7)

comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  mutate(pct_change =( omicron_deaths_over_cases -pre_deaths_over_cases ) / pre_deaths_over_cases ) %>% 
  ggplot(aes(y=fct_reorder(state,pct_change),  x= pct_change)) +
  geom_bar(stat='identity', position='dodge') +
  theme_c(legend.title = element_text(face="bold", size=11),
          axis.text.x=element_text(size=14),
          plot.subtitle = element_text(size=16,
                                       face="plain", 
                                       margin=margin(0,0,4,0))) +
  labs(x = TeX("$\\frac{Total\\,Deaths}{Total\\,Cases}$"),
       y="",
       fill = "Time Period",
       title = TeX("Percent Change in $\\frac{Total\\,Deaths}{Total\\,Cases}$ from the Time Period Before Omicron"),
       subtitle="Compared to During Omicron") +
  scale_x_continuous(labels=scales::percent)

comp_deaths %>%
  select(state,pre_deaths_over_cases,omicron_deaths_over_cases) %>%
  mutate(pct_change =( omicron_deaths_over_cases - pre_deaths_over_cases ) / pre_deaths_over_cases ) %>%
  left_join(state_overlaps, by = c('state' = 'name')) %>%
  group_by(state) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            pct_change=unique(pct_change)) %>%
  ggplot(aes(x=pct_change,y=mean_overlap)) +
  geom_point(size=2) +
  theme_c(  plot.subtitle = element_text(size=16,
                                       face="plain", 
                                       margin=margin(0,0,4,0))) +
  scale_x_continuous(labels=scales::percent) +
  labs(x="Percent Change", y = "Mean Overlap\n(By State)", 
        title = TeX("Relationship Between the Percent Change in $\\frac{Total\\,Deaths}{Total\\,Cases}$ from the Time Period Before Omicron"),
       subtitle="Compared to During Omicron versus the Mean Overlap")

comp_deaths %>%
  rename(name=state) %>%
  ungroup() %>%
  left_join(state_pop[,c("name", "population")]) %>%
  mutate(frac_infected = cumulative_cases_pre_omicron/population) %>%
  left_join(state_overlaps[,c("name","fraction_overlap", "biweek")]) %>%
  filter(biweek >= 25) %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            frac_infected=unique(frac_infected)) %>%
  ggplot(aes(x=frac_infected, y =mean_overlap)) +
  geom_point() +
  theme_c()

covidestim_link <- "https://covidestim.s3.us-east-2.amazonaws.com/latest/state/estimates.csv"
  
covidestim <- read_csv(covidestim_link)
  
# join data from each source to include dates before and after 2021-12-02
covidestim <- covidestim %>%
    select(date, contains("infections"), state) %>%
    filter(year(date) > 2020) %>%
    mutate(week = week(date), year = year(date))


covidestim %>%
  group_by(state) %>%
  summarize(infections=sum(infections))%>%
  rename(name=state) %>%
  left_join(state_pop[,c("name", "population")]) %>%
  mutate(frac_infected = infections/population) %>%
  left_join(state_overlaps[,c("name","fraction_overlap", "biweek")]) %>%
  filter(biweek >= 25) %>%
  group_by(name) %>%
  summarize(mean_overlap=mean(fraction_overlap),
            frac_infected=unique(frac_infected)) %>%
  ggplot(aes(x=frac_infected, y =mean_overlap)) +
  geom_point() +
  theme_c()